import plotly.express as px
import hvplot.pandas
import holoviews as hv
import plotly
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from holoviews.operation.datashader import datashade
from datetime import datetime, timezone
from plotly.subplots import make_subplots
hvplot.extension('plotly')
import plotly.io as pio
pio.renderers.default = "notebook"
df = pd.read_csv('./renamed.csv', engine='python')
sorted_df = df.sort_values(by='act_transaction_processing_beginning')
start_time = datetime.strptime("2021-04-12 20:22:41.0000", "%Y-%m-%d %H:%M:%S.%f").replace(tzinfo=timezone.utc)
end_time = datetime.strptime("2021-04-12 20:23:50.0000", "%Y-%m-%d %H:%M:%S.%f").replace(tzinfo=timezone.utc)
sorted_df['beginning_datetime_filter'] = pd.to_datetime(sorted_df['act_transaction_processing_beginning'], format='%Y-%m-%d %H:%M:%S.%f')
sorted_df = sorted_df[(sorted_df['beginning_datetime_filter'] >= start_time) & (sorted_df['beginning_datetime_filter'] <= end_time)].copy()
def plot_histogram(sorted_df, column_name, title='', xlabel='', ylabel=''):
hist_plot = sorted_df[column_name].hvplot.hist(title=title, xlabel=xlabel)
return hist_plot
def plot_histogram_log(sorted_df, column_name, title='', xlabel='', ylabel=''):
hist_plot = sorted_df[column_name].dropna()[sorted_df[column_name]> 0].hvplot.hist(logy=True, title=title, xlabel=xlabel, ylabel=ylabel)
return hist_plot
def plot_boxplot(sorted_df, column_name, title='', label=''):
box_plot = sorted_df[column_name].dropna().hvplot.box(title=title, label=label)
return box_plot
def plot_boxplot_log(sorted_df, column_name, title='', label=''):
sorted_df[column_name] = sorted_df[column_name].apply(lambda x: x if x > 0 else np.nan)
box_plot = sorted_df[column_name].dropna().hvplot.box(logy=True, title=title, label=label, ylabel=label)
return box_plot
def plot_scatter(sorted_df, x_column, y_column, title='', xlabel='', ylabel=''):
scatter_plot = sorted_df.hvplot(x=x_column, y=y_column, kind='line', title=title)
scatter_plot = scatter_plot.opts(title=title, xlabel=xlabel, ylabel=ylabel)
return scatter_plot
def plot_scatter_log(sorted_df, x_column, y_column, title='', xlabel='', ylabel=''):
scatter_plot = sorted_df.hvplot(x=x_column, y=y_column, kind='line', title=title, logy=True)
scatter_plot = scatter_plot.opts(title=title, xlabel=xlabel, ylabel=ylabel)
return scatter_plot
def plot_scatter_shade(sorted_df, x_column, y_column, title='', xlabel='', ylabel=''):
sorted_df[x_column] = pd.factorize(sorted_df[x_column])[0]
scatter_plot = sorted_df.hvplot(x=x_column, y=y_column, kind='line', title=title, datashade=True)
scatter_plot = scatter_plot.opts(title=title, xlabel=xlabel, ylabel=ylabel)
return scatter_plot
def plot_combined_scatter(sorted_df, x_column, y_columns, y_labels, titles, xlabel='', ylabel='', width=800):
scatter_plots = []
x = sorted_df[x_column]
for idx, (y_column, y_label) in enumerate(zip(y_columns, y_labels)):
y = sorted_df[y_column]
scatter_plot = hv.Curve((x, y), label=y_label).opts(title=titles[idx], xlabel=xlabel, ylabel=ylabel, width=width)
scatter_plots.append(scatter_plot)
combined_plot = hv.Layout(scatter_plots).cols(1)
return combined_plot.opts(shared_axes=True)
plot_histogram(sorted_df, 'act_transaction_processing_duration_ms', title='Duration of Transaction Processing (ms)', xlabel='Duration (ms)')
plot_histogram_log(sorted_df, 'act_transaction_processing_duration_ms', title = 'Duration of TransactionProcessing activity (log)', xlabel='Duration (ms)')
plot_boxplot(sorted_df, 'act_transaction_processing_duration_ms', title='Duration of Transaction Processing (ms)', label='Duration (ms)')
plot_boxplot_log(sorted_df, 'act_transaction_processing_duration_ms', title = 'Duration of TransactionProcessing (log)', label='Duration (ms)')
plot_scatter(sorted_df, 'act_transaction_processing_beginning', 'act_transaction_processing_duration_ms', title='Duration timeseries of TransactionProcessing', xlabel= 'Beginning of Transaction Processing Activity', ylabel='Duration of Transaction Processing (ms)')
plot_scatter_log(sorted_df, 'act_transaction_processing_beginning', 'act_transaction_processing_duration_ms', title='Duration timeseries of TransactionProcessing (log)', xlabel= 'Beginning of Transaction Processing Activity', ylabel='Duration of Transaction Processing (ms)')
plot_scatter_shade(sorted_df, 'act_transaction_processing_beginning', 'act_transaction_processing_duration_ms', title='Duration timeseries of TransactionProcessing (datashading)', xlabel= 'Beginning of Transaction Processing Activity', ylabel='Duration of Transaction Processing (ms)')
BokehModel(combine_events=True, render_bundle={'docs_json': {'e6c225d8-ebd1-4e08-8fe6-15c71673f9d9': {'version…
df2 = sorted_df.copy()
df2['class'] = 'Parent activity and sub-activities duration (ms)'
df2 = df2.sample(frac=0.01)
parent_duration = ['act_transaction_processing_duration_ms']
children_durations = ['act_awaiting_endorsement_duration_ms', 'act_awaiting_ordering_and_validation_duration_ms']
cols = parent_duration + children_durations
#labels = {'act_transaction_processing_duration_ms': 'Duration of Transaction Processing (ms)', **{col: label for col, label in zip(['act_awaiting_endorsement_duration_ms', 'act_awaiting_ordering_and_validation_duration_ms'], [' Duration of AwaitingEndorsement (ms)', ' Duration of AwaitingOrderingAndValidation (ms)'])}}
hvplot.plotting.parallel_coordinates(df2, class_column='class', cols=cols, width=800)
plot_combined_scatter(sorted_df, 'act_transaction_processing_beginning', ['act_transaction_processing_duration_ms'] + ['act_awaiting_endorsement_duration_ms', 'act_awaiting_ordering_and_validation_duration_ms'], ['Sequential Activity'] + [' Duration of AwaitingEndorsement (ms)', ' Duration of AwaitingOrderingAndValidation (ms)'], ['TransactionProcessing'] + ['TransactionProcessing - AwaitingEndorsement', 'TransactionProcessing - AwaitingOrderingAndValidation'], xlabel='Beginning of Transaction Processing Activity', ylabel='Duration (ms)')
#Box plots
parent_duration = ['act_transaction_processing_duration_ms']
children_durations = ['act_awaiting_endorsement_duration_ms', 'act_awaiting_ordering_and_validation_duration_ms']
children_labels = [' Duration of AwaitingEndorsement (ms)', ' Duration of AwaitingOrderingAndValidation (ms)']
cols = parent_duration + children_durations
df_to_plot = sorted_df[cols].melt()
fig = make_subplots(rows=1, cols=3, shared_yaxes=True)
labels = {'act_transaction_processing_duration_ms': 'Duration of Transaction Processing (ms)', **{col: label for col, label in zip(children_durations, children_labels)}}
# Add box plots for each column to the subplots
for i, col in enumerate(df_to_plot['variable'].unique(), 1):
x_values = [labels[col]] * len(df_to_plot.loc[df_to_plot['variable'] == col, 'value']) # Repeat the x-axis label for each box trace
fig.add_trace(go.Box(y=df_to_plot.loc[df_to_plot['variable'] == col, 'value'], x=x_values, name=labels[col]), row=1, col=i)
fig.update_layout(title="Comparision between sub-activites of Transaction Processing Activity", xaxis_title="Variables", yaxis_title="Duration (ms)", legend=dict(bgcolor='white'), plot_bgcolor='white')
fig.update_xaxes(showline=True, linewidth=2, linecolor='black', showgrid=True, gridwidth=1, gridcolor='lightgray')
fig.update_yaxes(showline=False, showgrid=True, gridwidth=1, gridcolor='lightgray')
fig.show()
plot_histogram(sorted_df, 'act_awaiting_endorsement_duration_ms', title='Duration of Awaiting Endorsement (ms)', xlabel='Duration (ms)')
plot_histogram_log(sorted_df, 'act_awaiting_endorsement_duration_ms', title = 'Duration of AwaitingEndorsement activity (log)', xlabel='Duration (ms)')
c:\Users\noori\anaconda3\envs\HLF-Jupyter\lib\site-packages\holoviews\plotting\plotly\element.py:480: RuntimeWarning: invalid value encountered in log10
plot_boxplot(sorted_df, 'act_awaiting_endorsement_duration_ms', title='Duration of Awaiting Endorsement (ms)', label='Duration (ms)')
plot_boxplot_log(sorted_df, 'act_awaiting_endorsement_duration_ms', title = 'Duration of AwaitingEndorsement (log)', label='Duration (ms)')
plot_scatter(sorted_df, 'act_awaiting_endorsement_beginning', 'act_awaiting_endorsement_duration_ms', title='Duration timeseries of AwaitingEndorsement', xlabel= 'Beginning of Awaiting Endorsement Activity', ylabel='Duration of Awaiting Endorsement (ms)')
plot_scatter_log(sorted_df, 'act_awaiting_endorsement_beginning', 'act_awaiting_endorsement_duration_ms', title='Duration timeseries of AwaitingEndorsement (log)', xlabel= 'Beginning of Awaiting Endorsement Activity', ylabel='Duration of Awaiting Endorsement (ms)')
plot_scatter_shade(sorted_df, 'act_awaiting_endorsement_beginning', 'act_awaiting_endorsement_duration_ms', title='Duration timeseries of AwaitingEndorsement (datashading)', xlabel= 'Beginning of Awaiting Endorsement Activity', ylabel='Duration of Awaiting Endorsement (ms)')
BokehModel(combine_events=True, render_bundle={'docs_json': {'e82c1ac2-e210-4ff7-8bf5-33394659023c': {'version…
# Not a Sequential Activity!
# Unable to create Scatter plots because of missing activity Beginning and Duration data.
# Not a Sequential Activity!
plot_histogram(sorted_df, 'act_endorsement_duration_ms', title='Duation of Endorsement Peer (ms)', xlabel='Duration (ms)')
plot_histogram_log(sorted_df, 'act_endorsement_duration_ms', title = 'Duration of EndorsementPeer1 activity (log)', xlabel='Duration (ms)')
c:\Users\noori\anaconda3\envs\HLF-Jupyter\lib\site-packages\holoviews\plotting\plotly\element.py:480: RuntimeWarning: invalid value encountered in log10
plot_boxplot(sorted_df, 'act_endorsement_duration_ms', title='Duation of Endorsement Peer (ms)', label='Duration (ms)')
plot_boxplot_log(sorted_df, 'act_endorsement_duration_ms', title = 'Duration of EndorsementPeer1 (log)', label='Duration (ms)')
plot_scatter(sorted_df, 'act_endorsement_beginning', 'act_endorsement_duration_ms', title='Duration timeseries of EndorsementPeer1', xlabel= 'Beginning of Endorsment Peer1', ylabel='Duation of Endorsement Peer (ms)')
plot_scatter_log(sorted_df, 'act_endorsement_beginning', 'act_endorsement_duration_ms', title='Duration timeseries of EndorsementPeer1 (log)', xlabel= 'Beginning of Endorsment Peer1', ylabel='Duation of Endorsement Peer (ms)')
plot_scatter_shade(sorted_df, 'act_endorsement_beginning', 'act_endorsement_duration_ms', title='Duration timeseries of EndorsementPeer1 (datashading)', xlabel= 'Beginning of Endorsment Peer1', ylabel='Duation of Endorsement Peer (ms)')
BokehModel(combine_events=True, render_bundle={'docs_json': {'389758ad-c4d5-48f6-8a50-85082fffa3bf': {'version…
# Not a Sequential Activity!
# Unable to create Scatter plots because of missing activity Beginning and Duration data.
# Not a Sequential Activity!
# Unable to create a Histogram plot because of missing activity Duration data.
# Unable to create a Histogram plot because of missing activity Duration data.
# Unable to create a Box plot because of missing activity Duration data.
# Unable to create a Box plot because of missing activity Duration data.
# Unable to create a Scatter plot because of missing activity Beginning and Duration data.
# Unable to create a Scatter plot because of missing activity Beginning and Duration data.
#Unable to generate Scatter Plot using Datashading because of missing activity Beginning and Duration data.
# Not a Sequential Activity!
# Unable to create Scatter plots because of missing activity Beginning and Duration data.
# Not a Sequential Activity!
plot_histogram(sorted_df, 'act_awaiting_ordering_and_validation_duration_ms', title='Duration of Awaiting Ordering and Validation (ms)', xlabel='Duration (ms)')
plot_histogram_log(sorted_df, 'act_awaiting_ordering_and_validation_duration_ms', title = 'Duration of AwaitingOrderingAndValidation activity (log)', xlabel='Duration (ms)')
plot_boxplot(sorted_df, 'act_awaiting_ordering_and_validation_duration_ms', title='Duration of Awaiting Ordering and Validation (ms)', label='Duration (ms)')
plot_boxplot_log(sorted_df, 'act_awaiting_ordering_and_validation_duration_ms', title = 'Duration of AwaitingOrderingAndValidation (log)', label='Duration (ms)')
plot_scatter(sorted_df, 'act_awaiting_ordering_and_validation_beginning', 'act_awaiting_ordering_and_validation_duration_ms', title='Duration timeseries of AwaitingOrderingAndValidation', xlabel= 'Beginning of Awaiting Ordering and Validation Activity', ylabel='Duration of Awaiting Ordering and Validation (ms)')
plot_scatter_log(sorted_df, 'act_awaiting_ordering_and_validation_beginning', 'act_awaiting_ordering_and_validation_duration_ms', title='Duration timeseries of AwaitingOrderingAndValidation (log)', xlabel= 'Beginning of Awaiting Ordering and Validation Activity', ylabel='Duration of Awaiting Ordering and Validation (ms)')
plot_scatter_shade(sorted_df, 'act_awaiting_ordering_and_validation_beginning', 'act_awaiting_ordering_and_validation_duration_ms', title='Duration timeseries of AwaitingOrderingAndValidation (datashading)', xlabel= 'Beginning of Awaiting Ordering and Validation Activity', ylabel='Duration of Awaiting Ordering and Validation (ms)')
BokehModel(combine_events=True, render_bundle={'docs_json': {'1d334a53-67a1-4222-b447-b25052e60d76': {'version…
df2 = sorted_df.copy()
df2['class'] = 'Parent activity and sub-activities duration (ms)'
df2 = df2.sample(frac=0.01)
parent_duration = ['act_awaiting_ordering_and_validation_duration_ms']
children_durations = ['act_block_inclusion_duration_ms', 'act_awaiting_validation_and_commit_duration_ms']
cols = parent_duration + children_durations
#labels = {'act_awaiting_ordering_and_validation_duration_ms': 'Duration of Awaiting Ordering and Validation (ms)', **{col: label for col, label in zip(['act_block_inclusion_duration_ms', 'act_awaiting_validation_and_commit_duration_ms'], [' Duration of BlockInclusionOrdere0 (ms)', ' Duration of AwaitingValidation (ms)'])}}
hvplot.plotting.parallel_coordinates(df2, class_column='class', cols=cols, width=800)
plot_combined_scatter(sorted_df, 'act_awaiting_ordering_and_validation_beginning', ['act_awaiting_ordering_and_validation_duration_ms'] + ['act_block_inclusion_duration_ms', 'act_awaiting_validation_and_commit_duration_ms'], ['Sequential Activity'] + [' Duration of BlockInclusionOrdere0 (ms)', ' Duration of AwaitingValidation (ms)'], ['AwaitingOrderingAndValidation'] + ['AwaitingOrderingAndValidation - BlockInclusionOrdere0', 'AwaitingOrderingAndValidation - AwaitingValidation'], xlabel='Beginning of Awaiting Ordering and Validation Activity', ylabel='Duration (ms)')
#Box plots
parent_duration = ['act_awaiting_ordering_and_validation_duration_ms']
children_durations = ['act_block_inclusion_duration_ms', 'act_awaiting_validation_and_commit_duration_ms']
children_labels = [' Duration of BlockInclusionOrdere0 (ms)', ' Duration of AwaitingValidation (ms)']
cols = parent_duration + children_durations
df_to_plot = sorted_df[cols].melt()
fig = make_subplots(rows=1, cols=3, shared_yaxes=True)
labels = {'act_awaiting_ordering_and_validation_duration_ms': 'Duration of Awaiting Ordering and Validation (ms)', **{col: label for col, label in zip(children_durations, children_labels)}}
# Add box plots for each column to the subplots
for i, col in enumerate(df_to_plot['variable'].unique(), 1):
x_values = [labels[col]] * len(df_to_plot.loc[df_to_plot['variable'] == col, 'value']) # Repeat the x-axis label for each box trace
fig.add_trace(go.Box(y=df_to_plot.loc[df_to_plot['variable'] == col, 'value'], x=x_values, name=labels[col]), row=1, col=i)
fig.update_layout(title="Comparision between sub-activites of Transaction Processing Activity", xaxis_title="Variables", yaxis_title="Duration (ms)", legend=dict(bgcolor='white'), plot_bgcolor='white')
fig.update_xaxes(showline=True, linewidth=2, linecolor='black', showgrid=True, gridwidth=1, gridcolor='lightgray')
fig.update_yaxes(showline=False, showgrid=True, gridwidth=1, gridcolor='lightgray')
fig.show()
plot_histogram(sorted_df, 'act_block_inclusion_duration_ms', title='Duration of Block Inclusion (ms)', xlabel='Duration (ms)')
plot_histogram_log(sorted_df, 'act_block_inclusion_duration_ms', title = 'Duration of BlockInclusionOrdere0 activity (log)', xlabel='Duration (ms)')
plot_boxplot(sorted_df, 'act_block_inclusion_duration_ms', title='Duration of Block Inclusion (ms)', label='Duration (ms)')
plot_boxplot_log(sorted_df, 'act_block_inclusion_duration_ms', title = 'Duration of BlockInclusionOrdere0 (log)', label='Duration (ms)')
plot_scatter(sorted_df, 'act_block_inclusion_beginning', 'act_block_inclusion_duration_ms', title='Duration timeseries of BlockInclusionOrdere0', xlabel= 'Beginning of Block Inclusion Activity', ylabel='Duration of Block Inclusion (ms)')
plot_scatter_log(sorted_df, 'act_block_inclusion_beginning', 'act_block_inclusion_duration_ms', title='Duration timeseries of BlockInclusionOrdere0 (log)', xlabel= 'Beginning of Block Inclusion Activity', ylabel='Duration of Block Inclusion (ms)')
plot_scatter_shade(sorted_df, 'act_block_inclusion_beginning', 'act_block_inclusion_duration_ms', title='Duration timeseries of BlockInclusionOrdere0 (datashading)', xlabel= 'Beginning of Block Inclusion Activity', ylabel='Duration of Block Inclusion (ms)')
BokehModel(combine_events=True, render_bundle={'docs_json': {'bfd23524-9281-4573-8aab-9628a6d4956f': {'version…
# Not a Sequential Activity!
# Unable to create Scatter plots because of missing activity Beginning and Duration data.
# Not a Sequential Activity!
plot_histogram(sorted_df, 'act_awaiting_validation_and_commit_duration_ms', title='Duration of Awaiting Validation and Commit (ms)', xlabel='Duration (ms)')
plot_histogram_log(sorted_df, 'act_awaiting_validation_and_commit_duration_ms', title = 'Duration of AwaitingValidation activity (log)', xlabel='Duration (ms)')
c:\Users\noori\anaconda3\envs\HLF-Jupyter\lib\site-packages\holoviews\plotting\plotly\element.py:480: RuntimeWarning: invalid value encountered in log10
plot_boxplot(sorted_df, 'act_awaiting_validation_and_commit_duration_ms', title='Duration of Awaiting Validation and Commit (ms)', label='Duration (ms)')
plot_boxplot_log(sorted_df, 'act_awaiting_validation_and_commit_duration_ms', title = 'Duration of AwaitingValidation (log)', label='Duration (ms)')
plot_scatter(sorted_df, 'act_awaiting_validation_and_commit_beginning', 'act_awaiting_validation_and_commit_duration_ms', title='Duration timeseries of AwaitingValidation', xlabel= 'Beginning of Awaiting Validation and Commit', ylabel='Duration of Awaiting Validation and Commit (ms)')
plot_scatter_log(sorted_df, 'act_awaiting_validation_and_commit_beginning', 'act_awaiting_validation_and_commit_duration_ms', title='Duration timeseries of AwaitingValidation (log)', xlabel= 'Beginning of Awaiting Validation and Commit', ylabel='Duration of Awaiting Validation and Commit (ms)')
plot_scatter_shade(sorted_df, 'act_awaiting_validation_and_commit_beginning', 'act_awaiting_validation_and_commit_duration_ms', title='Duration timeseries of AwaitingValidation (datashading)', xlabel= 'Beginning of Awaiting Validation and Commit', ylabel='Duration of Awaiting Validation and Commit (ms)')
BokehModel(combine_events=True, render_bundle={'docs_json': {'324d0a54-9539-415c-b44e-dc6a449408f7': {'version…
# Not a Sequential Activity!
# Unable to create Scatter plots because of missing activity Beginning and Duration data.
# Not a Sequential Activity!
# Unable to create a Histogram plot because of missing activity Duration data.
# Unable to create a Histogram plot because of missing activity Duration data.
# Unable to create a Box plot because of missing activity Duration data.
# Unable to create a Box plot because of missing activity Duration data.
# Unable to create a Scatter plot because of missing activity Beginning and Duration data.
# Unable to create a Scatter plot because of missing activity Beginning and Duration data.
#Unable to generate Scatter Plot using Datashading because of missing activity Beginning and Duration data.
# Not a Sequential Activity!
# Unable to create Scatter plots because of missing activity Beginning and Duration data.
# Not a Sequential Activity!
# Unable to create a Histogram plot because of missing activity Duration data.
# Unable to create a Histogram plot because of missing activity Duration data.
# Unable to create a Box plot because of missing activity Duration data.
# Unable to create a Box plot because of missing activity Duration data.
# Unable to create a Scatter plot because of missing activity Beginning and Duration data.
# Unable to create a Scatter plot because of missing activity Beginning and Duration data.
#Unable to generate Scatter Plot using Datashading because of missing activity Beginning and Duration data.
# Not a Sequential Activity!
# Unable to create Scatter plots because of missing activity Beginning and Duration data.
# Not a Sequential Activity!
# Unable to create a Histogram plot because of missing activity Duration data.
# Unable to create a Histogram plot because of missing activity Duration data.
# Unable to create a Box plot because of missing activity Duration data.
# Unable to create a Box plot because of missing activity Duration data.
# Unable to create a Scatter plot because of missing activity Beginning and Duration data.
# Unable to create a Scatter plot because of missing activity Beginning and Duration data.
#Unable to generate Scatter Plot using Datashading because of missing activity Beginning and Duration data.
# Not a Sequential Activity!
# Unable to create Scatter plots because of missing activity Beginning and Duration data.
# Not a Sequential Activity!
plot_histogram(sorted_df, 'act_block_validation_and_commit_duration_ms', title='Duration of Block Validation and Commit (ms)', xlabel='Duration (ms)')
plot_histogram_log(sorted_df, 'act_block_validation_and_commit_duration_ms', title = 'Duration of BlockValidationAndCommitPeer1 activity (log)', xlabel='Duration (ms)')
c:\Users\noori\anaconda3\envs\HLF-Jupyter\lib\site-packages\holoviews\plotting\plotly\element.py:480: RuntimeWarning: invalid value encountered in log10
plot_boxplot(sorted_df, 'act_block_validation_and_commit_duration_ms', title='Duration of Block Validation and Commit (ms)', label='Duration (ms)')
plot_boxplot_log(sorted_df, 'act_block_validation_and_commit_duration_ms', title = 'Duration of BlockValidationAndCommitPeer1 (log)', label='Duration (ms)')
# Unable to create a Scatter plot because of missing activity Beginning and Duration data.
# Unable to create a Scatter plot because of missing activity Beginning and Duration data.
#Unable to generate Scatter Plot using Datashading because of missing activity Beginning and Duration data.
# Not a Sequential Activity!
# Unable to create Scatter plots because of missing activity Beginning and Duration data.
# Not a Sequential Activity!